1 Loading packages

library(httr)
library(jsonlite)
options(stringsAsFactors = FALSE)

2 Getting web data

url <- "http://data.taipei/youbike"
ubike.list <- fromJSON(content(GET(url),"text", encoding = "utf-8"))

# take a glance at data
class(ubike.list) # list
## [1] "list"
# head(ubike.list)
# as.data.frame(ubike.list)
names(ubike.list)
## [1] "retCode" "retVal"
head(names(ubike.list$retVal))
## [1] "0001" "0002" "0003" "0004" "0005" "0006"

3 list -> vector -> matrix -> data.frame

NA

NA

NA

NA

# Select the right node and unlist it --> vector
ubike.v <- unlist(ubike.list$retVal)

# Fold it by a specified width --> matrix
ubike.m <- matrix(ubike.v, byrow = T, ncol = 14)

# Convert the matrix to dataframe
ubike.df <- as.data.frame(ubike.m)

4 Assigning variable names

NA

NA

# names(ubike.df)
names(ubike.df) <- names(ubike.list$retVal$`0001`)

5 Converting character vectors to numeric

NA NA NA

ubike.df$lng <- as.numeric(ubike.df$lng)
ubike.df$lat <- as.numeric(ubike.df$lat)
ubike.df$tot <- as.numeric(ubike.df$tot)
ubike.df$sbi <- as.numeric(ubike.df$sbi)

6 Creating a new variable

NA

# ratio <- sbi/tot
ubike.df$ratio <- ubike.df$sbi / ubike.df$tot
summary(ubike.df)
##      sno                sna                 tot             sbi      
##  Length:401         Length:401         Min.   : 22.0   Min.   : 0.0  
##  Class :character   Class :character   1st Qu.: 32.0   1st Qu.: 8.0  
##  Mode  :character   Mode  :character   Median : 38.0   Median :15.0  
##                                        Mean   : 40.3   Mean   :16.2  
##                                        3rd Qu.: 46.0   3rd Qu.:22.0  
##                                        Max.   :180.0   Max.   :65.0  
##                                                                      
##     sarea               mday                lat             lng       
##  Length:401         Length:401         Min.   :24.98   Min.   :121.5  
##  Class :character   Class :character   1st Qu.:25.03   1st Qu.:121.5  
##  Mode  :character   Mode  :character   Median :25.05   Median :121.5  
##                                        Mean   :25.05   Mean   :121.5  
##                                        3rd Qu.:25.07   3rd Qu.:121.6  
##                                        Max.   :25.15   Max.   :121.6  
##                                        NA's   :1       NA's   :1      
##       ar              sareaen             snaen          
##  Length:401         Length:401         Length:401        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##      aren               bemp               act                ratio       
##  Length:401         Length:401         Length:401         Min.   :0.0000  
##  Class :character   Class :character   Class :character   1st Qu.:0.2273  
##  Mode  :character   Mode  :character   Mode  :character   Median :0.4062  
##                                                           Mean   :0.4028  
##                                                           3rd Qu.:0.5667  
##                                                           Max.   :1.0000  
## 

7 Mapping with ggmap

NA

NA

NA

library(ggplot2)
library(ggmap)

ggmap(
    get_googlemap(
        center=c(121.516898,25.055536),
        zoom=12,
        maptype='terrain')) +
  geom_point(data=ubike.df, 
           aes(x=lng, y=lat), 
           colour='red', 
           size=ubike.df$tot/10, 
           alpha=0.4)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=25.055536,121.516898&zoom=12&size=640x640&scale=2&maptype=terrain&sensor=false
## Warning: Removed 1 rows containing missing values (geom_point).

8 Assigning color according to ratio levels

NA

NA

NA

NA

NA

NA

NA * lapply returns a list of the same length as X, each element of which is the result of applying FUN to the corresponding element of X. * sapply is a user-friendly version and wrapper of lapply by default returning a vector

assignColor <- function(ratio){
  if(ratio > 0.8){
    return("#FF0000") # red
  }
  else if(ratio < 0.2){
    return("#0000FF") # blue
  }
  else{
    return("#00FF00") # green
  }
}

ubike.df$color <- sapply(ubike.df$ratio, assignColor)

9 Recoloring geom_point by ratio

NA

ggmap(get_googlemap(center = c(121.516898,25.055536),
                    zoom = 12,
                    maptype = 'terrain')
      ) +
geom_point(data = ubike.df, 
           aes(lng, lat), 
           colour = ubike.df$color, 
           size = ubike.df$tot/10, 
           alpha = 0.4)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=25.055536,121.516898&zoom=12&size=640x640&scale=2&maptype=terrain&sensor=false
## Warning: Removed 1 rows containing missing values (geom_point).